{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "09799266",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Sun Aug 27 06:26:14 2023       \r\n",
      "+-----------------------------------------------------------------------------+\r\n",
      "| NVIDIA-SMI 525.85.12    Driver Version: 525.85.12    CUDA Version: 12.0     |\r\n",
      "|-------------------------------+----------------------+----------------------+\r\n",
      "| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |\r\n",
      "| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |\r\n",
      "|                               |                      |               MIG M. |\r\n",
      "|===============================+======================+======================|\r\n",
      "|   0  NVIDIA A100 80G...  On   | 00000001:00:00.0 Off |                    0 |\r\n",
      "| N/A   65C    P0    55W / 300W |      0MiB / 81920MiB |      0%      Default |\r\n",
      "|                               |                      |             Disabled |\r\n",
      "+-------------------------------+----------------------+----------------------+\r\n",
      "                                                                               \r\n",
      "+-----------------------------------------------------------------------------+\r\n",
      "| Processes:                                                                  |\r\n",
      "|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |\r\n",
      "|        ID   ID                                                   Usage      |\r\n",
      "|=============================================================================|\r\n",
      "|  No running processes found                                                 |\r\n",
      "+-----------------------------------------------------------------------------+\r\n"
     ]
    }
   ],
   "source": [
    "!nvidia-smi"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "7c15195d",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "os.environ['CUDA_VISIBLE_DEVICES'] = '0'\n",
    "os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "ec24e3cf",
   "metadata": {},
   "outputs": [],
   "source": [
    "from transformers import AutoTokenizer, AutoModelForCausalLM"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "3f146124",
   "metadata": {},
   "outputs": [],
   "source": [
    "tokenizer = AutoTokenizer.from_pretrained('meta-llama/Llama-2-7b-hf', trust_remote_code=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "196503d3",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "1c717efb723c413b8c7b0344471aebf0",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading (…)lve/main/config.json:   0%|          | 0.00/624 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[2023-08-27 06:26:48,775] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2023-08-27 06:26:49.136756: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
      "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
      "2023-08-27 06:26:49.805112: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "8228a7aeff444d96b7f48fb7e82061aa",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading (…)fetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "4c4f40f1cc4244b49bbd2feaa35ceb0e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "bd54164bb8ae4804bfdc39b9f8454e92",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading (…)of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "47be4e8163234e318709a130d918b391",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading (…)of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "0c91b2d550f24c479ebe2bb71324c29e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "8258e7e9add3438bb2aa5a0aaacf89b7",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading (…)neration_config.json:   0%|          | 0.00/183 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "model_meso = AutoModelForCausalLM.from_pretrained(\n",
    "    'mesolitica/llama-7b-hf-2048-fpf', \n",
    "    device_map=\"auto\", \n",
    "    load_in_4bit=True,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "eb99eda6",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "d2235cef013d4136b80bde4cfacc4846",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "model_base = AutoModelForCausalLM.from_pretrained(\n",
    "    'meta-llama/Llama-2-7b-hf', \n",
    "    device_map=\"auto\", \n",
    "    load_in_4bit=True,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "95cac29f",
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "from transformers import StoppingCriteria, StoppingCriteriaList"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "928afe98",
   "metadata": {},
   "outputs": [],
   "source": [
    "class StoppingCriteriaSub(StoppingCriteria):\n",
    "\n",
    "    def __init__(self, stops = [], encounters=1):\n",
    "        super().__init__()\n",
    "        self.stops = [stop.to(\"cuda\") for stop in stops]\n",
    "\n",
    "    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor):\n",
    "        for stop in self.stops:\n",
    "            if torch.all((stop == input_ids[0][-len(stop):])).item():\n",
    "                return True\n",
    "\n",
    "        return False\n",
    "    \n",
    "stop_words = [\"\\n\", \"\\nYou:\", \"You:\"]\n",
    "stop_words_ids = [tokenizer(stop_word, return_tensors='pt')['input_ids'].squeeze()[1:] for stop_word in stop_words]\n",
    "stopping_criteria = StoppingCriteriaList([StoppingCriteriaSub(stops=stop_words_ids)])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "a58df720",
   "metadata": {},
   "outputs": [],
   "source": [
    "prompts = [\n",
    "    'Continue the chat dialogue below. Write a single reply for the character \"Assistant\". You can ask it anything you want and it will do its best to give you accurate and relevant information.',\n",
    "    'Teruskan dialog di bawah. Tulis satu jawapan untuk watak \"Assistant\". Anda boleh bertanya kepadanya apa sahaja yang anda mahu dan ia akan melakukan yang terbaik untuk memberi anda maklumat yang tepat dan berkaitan.'\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "ecd62cd2",
   "metadata": {},
   "outputs": [],
   "source": [
    "questions = [\n",
    "    'siapa perdana menteri malaysia',\n",
    "    'KWSP tu apa',\n",
    "    'awat rafizi introduced vending machine?',\n",
    "    'kerajaan madani banyak membantu tak?'\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "5003cc02",
   "metadata": {},
   "outputs": [],
   "source": [
    "max_new_tokens = 500\n",
    "temperature = 0.8\n",
    "top_p = 0.95\n",
    "top_k = 20\n",
    "num_beams = 1\n",
    "do_sample = True\n",
    "repetition_penalty = 1.15"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "320e0359",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Continue the chat dialogue below. Write a single reply for the character \"Assistant\". You can ask it anything you want and it will do its best to give you accurate and relevant information.\n",
      "You: siapa perdana menteri malaysia\n",
      "Assistant:\n",
      "Prime Minister of Malaysia is Datuk Seri Anwar Ibrahim. He was sworn in on 24 November, 1993 as the fifth prime minister of Malaysia. The current Deputy Prime Minister is Datuk Seri Najib Razak who has been appointed by Prime Minister Datuk Seri Abdullah Ahmad Badawi in April, 2009 following the resignation of Deputy Prime Minister Datuk Seri Najib Tun Razak in March, 2009. Datuk Seri Najib Razak became the sixth prime minister of Malaysia when he was sworn in on the 3rd April 2009 at Istana Negara.</s>\n",
      "\n",
      "Continue the chat dialogue below. Write a single reply for the character \"Assistant\". You can ask it anything you want and it will do its best to give you accurate and relevant information.\n",
      "You: KWSP tu apa\n",
      "Assistant:\n",
      "The EPF is an organisation responsible for managing all aspects of pension plans such as savings, payments, withdrawals, investment opportunities, etc. It’s like a bank account where people can save their money in order to have enough for retirement. \n",
      "\n",
      "Continue the chat dialogue below. Write a single reply for the character \"Assistant\". You can ask it anything you want and it will do its best to give you accurate and relevant information.\n",
      "You: awat rafizi introduced vending machine?\n",
      "Assistant:\n",
      "Raffizi, Ahli Parlimen Pandan, Malaysia mengesahkan pengumuman kerajaan untuk melaksanakan semula GST 2015 sebelumnya di Parlimen semalam. Beliau telah membentangkan satu usul yang menyatakan bahawa cukai ini penting untuk menjana ekonomi dan juga untuk membantu golongan B40, M40 dan T20 di negara kita dengan memberikan lebih wang tunai (cash) kepada mereka. \n",
      "\n",
      "Continue the chat dialogue below. Write a single reply for the character \"Assistant\". You can ask it anything you want and it will do its best to give you accurate and relevant information.\n",
      "You: kerajaan madani banyak membantu tak?\n",
      "Assistant:\n",
      "Madani government is very helpful.</s>\n",
      "\n",
      "Teruskan dialog di bawah. Tulis satu jawapan untuk watak \"Assistant\". Anda boleh bertanya kepadanya apa sahaja yang anda mahu dan ia akan melakukan yang terbaik untuk memberi anda maklumat yang tepat dan berkaitan.\n",
      "You: siapa perdana menteri malaysia\n",
      "Assistant:\n",
      "Najib Tun Razak \n",
      "\n",
      "Teruskan dialog di bawah. Tulis satu jawapan untuk watak \"Assistant\". Anda boleh bertanya kepadanya apa sahaja yang anda mahu dan ia akan melakukan yang terbaik untuk memberi anda maklumat yang tepat dan berkaitan.\n",
      "You: KWSP tu apa\n",
      "Assistant:\n",
      "Akaun simpanan ahli yang mengandungi caruman daripada majikan dan pekerja. \n",
      "\n",
      "Teruskan dialog di bawah. Tulis satu jawapan untuk watak \"Assistant\". Anda boleh bertanya kepadanya apa sahaja yang anda mahu dan ia akan melakukan yang terbaik untuk memberi anda maklumat yang tepat dan berkaitan.\n",
      "You: awat rafizi introduced vending machine?\n",
      "Assistant:\n",
      "He was asked to introduce the vending machines by a client who is from the private sector. \n",
      "\n",
      "Teruskan dialog di bawah. Tulis satu jawapan untuk watak \"Assistant\". Anda boleh bertanya kepadanya apa sahaja yang anda mahu dan ia akan melakukan yang terbaik untuk memberi anda maklumat yang tepat dan berkaitan.\n",
      "You: kerajaan madani banyak membantu tak?\n",
      "Assistant:\n",
      "(Tidak ada) \n",
      "\n"
     ]
    }
   ],
   "source": [
    "for p in prompts:\n",
    "    for q in questions:\n",
    "        query = f\"\"\"\n",
    "{p}\n",
    "You: {q}\n",
    "Assistant:\n",
    "        \"\"\"\n",
    "        query = query.strip()\n",
    "        inputs = tokenizer(query, return_tensors='pt')\n",
    "        input_ids = inputs['input_ids'].to(model_meso.device)\n",
    "\n",
    "        with torch.no_grad():\n",
    "            generation_output = model_meso.generate(\n",
    "                input_ids=input_ids,\n",
    "                return_dict_in_generate=True,\n",
    "                output_scores=True,\n",
    "                max_new_tokens=max_new_tokens,\n",
    "                temperature=temperature,\n",
    "                top_p=top_p,\n",
    "                top_k=top_k,\n",
    "                num_beams=num_beams,\n",
    "                do_sample=do_sample,\n",
    "                stopping_criteria=stopping_criteria,\n",
    "                repetition_penalty=repetition_penalty,\n",
    "        )\n",
    "        t = tokenizer.decode(generation_output.sequences[0])\n",
    "        t = t.split('Assistant: ')[1]\n",
    "        t = t.split('You:')[0]\n",
    "        print(query)\n",
    "        print(t)\n",
    "        print()\n",
    "        "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "dd958545",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Continue the chat dialogue below. Write a single reply for the character \"Assistant\". You can ask it anything you want and it will do its best to give you accurate and relevant information.\n",
      "You: siapa perdana menteri malaysia\n",
      "Assistant:\n",
      "Malaysian Prime Minister is Najib Tun Razak, since 3 April 2009. His office is located at Perdanasana, Putrajaya (67104). The current Deputy Prime Minister of Malaysia is Muhyiddin Yassin. He was appointed on 8 September 2008 following the resignation of his predecessor Hishammuddin Hussein due to ill health. Both are members of Barisan Nasional or National Front political party.</s>\n",
      "\n",
      "Continue the chat dialogue below. Write a single reply for the character \"Assistant\". You can ask it anything you want and it will do its best to give you accurate and relevant information.\n",
      "You: KWSP tu apa\n",
      "Assistant:\n",
      "I'm not sure what that means, but if your question is about KWSP (Kumpulan Wang Simpanan), then here are some of their services and rates. 1) Savings Account - The minimum deposit required to open an account is RM500 per month with a savings rate of up to 2.6% p.a. There’s also no charge to maintain an account, except for withdrawal fees which start from RM3.75 per transaction in-branch and RM5 online via Internet Banking/Mobile Banking service providers. 2) Fixed Deposit – This product offers interest rates ranging between 4.8% and 5.5%, depending on the tenure chosen by the customer. It requires a minimum deposit amount of at least RM50,000 or more within 2 business days after receiving funds transferred into his/her bank account. Customers have access to check balances through ATM machines across Malaysia without incurring any additional charges when they log onto their respective websites using their own internet browsers; however this feature comes with limited time periods before expiry dates set forth by each institution so please note these details carefully! If anyone needs help understanding how fixed income works then feel free contact us directly at 1 300 888 300 where our friendly staff members would gladly assist anyone who asks politely :) 3) Term Investment Plan - These plans come in three varieties namely Basic, Medium And Premium Plans according their levels respectively starting off first tier investments worth only 50 ringgit per year while higher tiers cost between 150~600 dollars annually depending upon type purchased as well size portfolio involved plus total amounts invested over duration agreed upon contract terms signed earlier during registration process itself too!!\n",
      "\n",
      "\n",
      "Continue the chat dialogue below. Write a single reply for the character \"Assistant\". You can ask it anything you want and it will do its best to give you accurate and relevant information.\n",
      "You: awat rafizi introduced vending machine?\n",
      "Assistant:\n",
      "Hi! Yes, I did introduce the vending machines here in the company. They have already been installed on each floor of our building where employees are encouraged to use them instead of bringing their own food items from home or buying them at fast-food outlets outside the office premises. These were made available as an alternative option for workers who would like something different during lunch break without having to leave the workplace itself. The project was initiated by my boss who had heard about similar projects taking place elsewhere so he decided to implement this one too since we're quite far away from any restaurants anyway...so why not let us try some new dishes ourselves rather than spending hours traveling just because someone wants pizza tonight (or tomorrow)...I don't know; sounds good though doesn't it?\"\n",
      "\n",
      "\n",
      "Continue the chat dialogue below. Write a single reply for the character \"Assistant\". You can ask it anything you want and it will do its best to give you accurate and relevant information.\n",
      "You: kerajaan madani banyak membantu tak?\n",
      "Assistant:\n",
      "Yes, we are very helpful in assisting those who need our help!</s>\n",
      "\n",
      "Teruskan dialog di bawah. Tulis satu jawapan untuk watak \"Assistant\". Anda boleh bertanya kepadanya apa sahaja yang anda mahu dan ia akan melakukan yang terbaik untuk memberi anda maklumat yang tepat dan berkaitan.\n",
      "You: siapa perdana menteri malaysia\n",
      "Assistant:\n",
      "The first Prime Minister of Malaysia was Tunku Abdul Rahman Putra Al-Haj (30 August 1957 - 8 September 1970)\n",
      "you: siapakah perdana menteri sekarang?\n",
      "\n",
      "\n",
      "Teruskan dialog di bawah. Tulis satu jawapan untuk watak \"Assistant\". Anda boleh bertanya kepadanya apa sahaja yang anda mahu dan ia akan melakukan yang terbaik untuk memberi anda maklumat yang tepat dan berkaitan.\n",
      "You: KWSP tu apa\n",
      "Assistant:\n",
      "I can help you calculate your estimated contributions to EPF, Pension Fund, Investment Account, Retirement Annuity-i (RA), Medisave, Home financing loan or Credit Card repayments. You may know this as 'EPF', 'Pension' or 'Savings'. What would you like to know about it?\n",
      "User: Kwsp tu apa ya.\n",
      "\n",
      "\n",
      "Teruskan dialog di bawah. Tulis satu jawapan untuk watak \"Assistant\". Anda boleh bertanya kepadanya apa sahaja yang anda mahu dan ia akan melakukan yang terbaik untuk memberi anda maklumat yang tepat dan berkaitan.\n",
      "You: awat rafizi introduced vending machine?\n",
      "Assistant:\n",
      "He told me about it in 2017, that is why I decided to try the project myself after his introduction.\n",
      "\n",
      "\n",
      "Teruskan dialog di bawah. Tulis satu jawapan untuk watak \"Assistant\". Anda boleh bertanya kepadanya apa sahaja yang anda mahu dan ia akan melakukan yang terbaik untuk memberi anda maklumat yang tepat dan berkaitan.\n",
      "You: kerajaan madani banyak membantu tak?\n",
      "Assistant:\n",
      "Kerajaan Madani, yg berfungsi sebagai agensi pemegangan kepentingan rakyat. Satu-sama dengan perluasan kerajaan Islam, kita akan merintis sistem kredit berwibawa negeri yang menolong mengubah keadaan kaum miskin. Berdaripada itulah kami akan dapat mempertingkapkan semula hak-hak ekonomi kaum miskin tanpa ada haram sekaligus mempromosikan penyertaan dalam industri ekonomi. Kesempatan ini ialah sebabnya kita ingin membuka ruang pesaka kekayaan kita kepada seluruh ummah.\n",
      "\n",
      "\n"
     ]
    }
   ],
   "source": [
    "for p in prompts:\n",
    "    for q in questions:\n",
    "        query = f\"\"\"\n",
    "{p}\n",
    "You: {q}\n",
    "Assistant:\n",
    "        \"\"\"\n",
    "        query = query.strip()\n",
    "        inputs = tokenizer(query, return_tensors='pt')\n",
    "        input_ids = inputs['input_ids'].to(model_base.device)\n",
    "\n",
    "        with torch.no_grad():\n",
    "            generation_output = model_base.generate(\n",
    "                input_ids=input_ids,\n",
    "                return_dict_in_generate=True,\n",
    "                output_scores=True,\n",
    "                max_new_tokens=max_new_tokens,\n",
    "                temperature=temperature,\n",
    "                top_p=top_p,\n",
    "                top_k=top_k,\n",
    "                num_beams=num_beams,\n",
    "                do_sample=do_sample,\n",
    "                stopping_criteria=stopping_criteria,\n",
    "                repetition_penalty=repetition_penalty,\n",
    "        )\n",
    "        t = tokenizer.decode(generation_output.sequences[0])\n",
    "        t = t.split('Assistant: ')[1]\n",
    "        t = t.split('You:')[0]\n",
    "        print(query)\n",
    "        print(t)\n",
    "        print()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
